Skip to content

Commit 7b31d81

Browse files
committed
Add index field to Position
1 parent 3a7565b commit 7b31d81

File tree

6 files changed

+73
-71
lines changed

6 files changed

+73
-71
lines changed

β€ŽCHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ Breaking changes:
2727
without causing issues with `<$>`.
2828
- Rename module prefix from `Text.Parsing.Parser` to `Parsing` (#169 by @jamesdbrock)
2929
- Delete the `regex` parser and replace it with `mkRegex`. (#170 by @jamesdbrock)
30+
- Add the `index` field to `Position`. (#171 by @jamesdbrock)
3031

3132
New features:
3233

β€Žbench/Main.purs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ import Bench.Json.Parsing as BenchParsing
5757
import Bench.Json.StringParser as BenchStringParser
5858
import Bench.Json.TestData (largeJson, mediumJson, smallJson)
5959
import Control.Monad.Trampoline (runTrampoline)
60-
import Control.Monad.Free (liftF)
6160
import Data.Array (fold, replicate)
6261
import Data.Either (either)
6362
import Data.List (many, manyRec)

β€Žsrc/Parsing/Indent.purs

Lines changed: 13 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -85,18 +85,6 @@ get' = do
8585
put' :: forall s. Position -> IndentParser s Unit
8686
put' p = lift (put p)
8787

88-
sourceColumn :: Position -> Int
89-
sourceColumn (Position { line: _, column: c }) = c
90-
91-
sourceLine :: Position -> Int
92-
sourceLine (Position { line: l, column: _ }) = l
93-
94-
setSourceLine :: Position -> Int -> Position
95-
setSourceLine (Position { line: _, column: c }) l = Position { line: l, column: c }
96-
97-
biAp :: forall a b c. (a -> b) -> (b -> b -> c) -> a -> a -> c
98-
biAp f c v1 v2 = c (f v1) (f v2)
99-
10088
many1 :: forall s m a. ParserT s m a -> ParserT s m (List a)
10189
many1 p = lift2 Cons p (many p)
10290

@@ -121,19 +109,17 @@ withBlock' = withBlock (flip const)
121109
-- | Parses only when indented past the level of the reference
122110
indented :: forall s. IndentParser s Unit
123111
indented = do
124-
pos <- position
125-
s <- get'
126-
if biAp sourceColumn (<=) pos s then fail "not indented"
127-
else do
128-
put' $ setSourceLine s (sourceLine pos)
129-
pure unit
112+
Position p <- position
113+
Position s <- get'
114+
if p.column <= s.column then fail "not indented"
115+
else put' $ Position { index: 0, line: p.line, column: s.column }
130116

131117
-- | Same as `indented`, but does not change internal state
132118
indented' :: forall s. IndentParser s Unit
133119
indented' = do
134-
pos <- position
135-
s <- get'
136-
if biAp sourceColumn (<=) pos s then fail "not indented" else pure unit
120+
Position p <- position
121+
Position s <- get'
122+
if p.column <= s.column then fail "not indented" else pure unit
137123

138124
-- | Parses only when indented past the level of the reference or on the same line
139125
sameOrIndented :: forall s. IndentParser s Unit
@@ -142,9 +128,9 @@ sameOrIndented = sameLine <|> indented
142128
-- | Parses only on the same line as the reference
143129
sameLine :: forall s. IndentParser s Unit
144130
sameLine = do
145-
pos <- position
146-
s <- get'
147-
if biAp sourceLine (==) pos s then pure unit else fail "over one line"
131+
Position p <- position
132+
Position s <- get'
133+
if p.line == s.line then pure unit else fail "over one line"
148134

149135
-- | Parses a block of lines at the same indentation level
150136
block1 :: forall s a. IndentParser s a -> IndentParser s (List a)
@@ -169,9 +155,9 @@ withPos x = do
169155
-- | Ensures the current indentation level matches that of the reference
170156
checkIndent :: forall s. IndentParser s Unit
171157
checkIndent = do
172-
s <- get'
173-
p <- position
174-
if biAp sourceColumn (==) p s then pure unit else fail "indentation doesn't match"
158+
Position p <- position
159+
Position s <- get'
160+
if p.column == s.column then pure unit else fail "indentation doesn't match"
175161

176162
-- | Run the result of an indentation sensitive parse
177163
runIndent :: forall a. State Position a -> a

β€Žsrc/Parsing/Pos.purs

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,25 +3,30 @@ module Parsing.Pos where
33
import Prelude
44

55
import Data.Generic.Rep (class Generic)
6+
import Data.Show.Generic (genericShow)
67

78
-- | `Position` represents the position of the parser in the input.
89
-- |
9-
-- | - `line` is the current line in the input
10-
-- | - `column` is the column of the next character in the current line that will be parsed
10+
-- | - `index` is the position since the start of the input. Starts at 0.
11+
-- | - `line` is the current line in the input. Starts at 1.
12+
-- | - `column` is the column of the next character in the current line that
13+
-- | will be parsed. Starts at 1.
1114
newtype Position = Position
12-
{ line :: Int
15+
{ index :: Int
16+
, line :: Int
1317
, column :: Int
1418
}
1519

16-
derive instance genericPosition :: Generic Position _
20+
derive instance Generic Position _
21+
instance Show Position where
22+
show x = genericShow x
1723

18-
instance showPosition :: Show Position where
19-
show (Position { line: line, column: column }) =
20-
"(Position { line: " <> show line <> ", column: " <> show column <> " })"
24+
instance Eq Position where
25+
eq (Position l) (Position r) = l.index == r.index
2126

22-
derive instance eqPosition :: Eq Position
23-
derive instance ordPosition :: Ord Position
27+
instance Ord Position where
28+
compare (Position l) (Position r) = compare l.index r.index
2429

2530
-- | The `Position` before any input has been parsed.
2631
initialPos :: Position
27-
initialPos = Position { line: 1, column: 1 }
32+
initialPos = Position { index: 0, line: 1, column: 1 }

β€Žsrc/Parsing/String.purs

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,20 @@
1919
-- | The other primitive parsers, which return `CodePoint` and `String` types,
2020
-- | can parse the full Unicode character set. All of the primitive parsers
2121
-- | in this module can be used together.
22+
-- |
23+
-- | ### Position
24+
-- |
25+
-- | In a `String` parser, the `Position {index}` counts the number of
26+
-- | unicode `CodePoint`s since the beginning of the input string.
27+
-- |
28+
-- | Each tab character (`0x09`) encountered in a `String` parser will advance
29+
-- | the `Position {column}` by 8.
30+
-- |
31+
-- | These patterns will advance the `Position {line}` by 1 and reset
32+
-- | the `Position {column}` to 1:
33+
-- | - newline (`0x0A`)
34+
-- | - carriage-return (`0x0D`)
35+
-- | - carriage-return-newline (`0x0D 0x0A`)
2236
module Parsing.String
2337
( string
2438
, eof
@@ -187,14 +201,14 @@ updatePosString pos before after = case uncons before of
187201
-- | Updates a `Position` by adding the columns and lines in a
188202
-- | single `CodePoint`.
189203
updatePosSingle :: Position -> CodePoint -> String -> Position
190-
updatePosSingle (Position { line, column }) cp after = case fromEnum cp of
191-
10 -> Position { line: line + 1, column: 1 } -- "\n"
204+
updatePosSingle (Position { index, line, column }) cp after = case fromEnum cp of
205+
10 -> Position { index: index + 1, line: line + 1, column: 1 } -- "\n"
192206
13 ->
193207
case codePointAt 0 after of
194-
Just nextCp | fromEnum nextCp == 10 -> Position { line, column } -- "\r\n" lookahead
195-
_ -> Position { line: line + 1, column: 1 } -- "\r"
196-
9 -> Position { line, column: column + 8 - ((column - 1) `mod` 8) } -- "\t" Who says that one tab is 8 columns?
197-
_ -> Position { line, column: column + 1 }
208+
Just nextCp | fromEnum nextCp == 10 -> Position { index: index + 1, line, column } -- "\r\n" lookahead
209+
_ -> Position { index: index + 1, line: line + 1, column: 1 } -- "\r"
210+
9 -> Position { index: index + 1, line, column: column + 8 - ((column - 1) `mod` 8) } -- "\t" Who says that one tab is 8 columns?
211+
_ -> Position { index: index + 1, line, column: column + 1 }
198212

199213
-- | Combinator which returns both the result of a parse and the slice of
200214
-- | the input that was consumed while it was being parsed.

β€Žtest/Main.purs

Lines changed: 24 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ stackSafeLoopsTest = do
128128
parseErrorTestPosition
129129
(many1TillRec (string "a") (string "b"))
130130
"baa"
131-
(Position { line: 1, column: 1 })
131+
(Position { index: 0, line: 1, column: 1 })
132132

133133
parseTest "a,a,a,b,a,a" (toUnfoldable [ "a", "a", "a" ]) $
134134
sepEndByRec (string "a") (string ",")
@@ -144,7 +144,7 @@ stackSafeLoopsTest = do
144144
parseErrorTestPosition
145145
(sepEndBy1Rec (string "a") (string ","))
146146
"b,a,a"
147-
(Position { line: 1, column: 1 })
147+
(Position { index: 0, line: 1, column: 1 })
148148

149149
-- 8 `div` (8 `div` 2) == 2
150150
parseTest "8x8x2" 2 $
@@ -156,7 +156,7 @@ stackSafeLoopsTest = do
156156
parseErrorTestPosition
157157
(chainr1Rec digit (string "x" $> div))
158158
""
159-
(Position { line: 1, column: 1 })
159+
(Position { index: 0, line: 1, column: 1 })
160160

161161
-- (8 `div` 2) `div` 2 == 2
162162
parseTest "8x2x2" 2 $
@@ -168,15 +168,15 @@ stackSafeLoopsTest = do
168168
parseErrorTestPosition
169169
(chainl1Rec digit (string "x" $> div))
170170
""
171-
(Position { line: 1, column: 1 })
171+
(Position { index: 0, line: 1, column: 1 })
172172

173173
parseTest "aaaabcd" "b"
174174
$ skipMany1Rec (string "a")
175175
*> string "b"
176176
parseErrorTestPosition
177177
(skipMany1Rec (string "a"))
178178
"bcd"
179-
(Position { line: 1, column: 1 })
179+
(Position { index: 0, line: 1, column: 1 })
180180

181181
parseTest "aaaabcd" "b"
182182
$ skipManyRec (string "a")
@@ -190,7 +190,7 @@ stackSafeLoopsTest = do
190190
parseErrorTestPosition
191191
(many1Rec (string "a"))
192192
""
193-
(Position { line: 1, column: 1 })
193+
(Position { index: 0, line: 1, column: 1 })
194194

195195
parseTest "a,a,ab" (toUnfoldable [ "a", "a", "a" ])
196196
$ sepByRec (string "a") (string ",")
@@ -204,11 +204,11 @@ stackSafeLoopsTest = do
204204
parseErrorTestPosition
205205
(sepBy1Rec (string "a") (string ","))
206206
""
207-
(Position { line: 1, column: 1 })
207+
(Position { index: 0, line: 1, column: 1 })
208208
parseErrorTestPosition
209209
(sepBy1Rec (string "a") (string ","))
210210
"a,"
211-
(Position { line: 1, column: 3 })
211+
(Position { index: 2, line: 1, column: 3 })
212212

213213
parseTest "a,a,a,b" (toUnfoldable [ "a", "a", "a" ])
214214
$ endByRec (string "a") (string ",")
@@ -222,11 +222,11 @@ stackSafeLoopsTest = do
222222
parseErrorTestPosition
223223
(endBy1Rec (string "a") (string ","))
224224
""
225-
(Position { line: 1, column: 1 })
225+
(Position { index: 0, line: 1, column: 1 })
226226
parseErrorTestPosition
227227
(endBy1Rec (string "a") (string ","))
228228
"a,a"
229-
(Position { line: 1, column: 4 })
229+
(Position { index: 3, line: 1, column: 4 })
230230

231231
data TestToken = A | B
232232

@@ -247,10 +247,7 @@ testTokenParser :: TokenParser
247247
testTokenParser = makeTokenParser haskellDef
248248

249249
mkPos :: Int -> Position
250-
mkPos n = mkPos' n 1
251-
252-
mkPos' :: Int -> Int -> Position
253-
mkPos' column line = Position { column: column, line: line }
250+
mkPos n = Position { index: n - 1, line: 1, column: n }
254251

255252
type TestM = Effect Unit
256253

@@ -577,12 +574,12 @@ main = do
577574
parseErrorTestPosition
578575
(many $ char 'f' *> char '?')
579576
"foo"
580-
(Position { column: 2, line: 1 })
577+
(Position { index: 1, column: 2, line: 1 })
581578

582579
parseErrorTestPosition
583580
(satisfy (_ == '?'))
584581
"foo"
585-
(Position { column: 1, line: 1 })
582+
(Position { index: 0, column: 1, line: 1 })
586583

587584
parseTest
588585
"foo"
@@ -607,17 +604,17 @@ main = do
607604

608605
parseTest "rest" "rest" rest
609606
parseTest "rest" unit (rest *> eof)
610-
parseTest "rest\nrest" (Position { line: 2, column: 5 }) (rest *> position)
607+
parseTest "rest\nrest" (Position { index: 9, line: 2, column: 5 }) (rest *> position)
611608

612609
parseErrorTestPosition
613610
(rest *> notFollowedBy eof)
614611
"aa\naa"
615-
(Position { column: 3, line: 2 })
612+
(Position { index: 5, column: 3, line: 2 })
616613

617614
parseErrorTestPosition
618-
anyChar
619-
"𝅑"
620-
(Position { column: 1, line: 1 })
615+
(string "π…Ÿπ… " *> string "π…Ÿπ… ")
616+
"π…Ÿπ… x𝅑"
617+
(Position { index: 2, column: 3, line: 1 })
621618

622619
parseTest "π…Ÿπ… x𝅑" [ "π…Ÿ", "𝅘𝅥𝅮", "x", "𝅑" ] do
623620
quarter <- anyCodePoint
@@ -633,8 +630,8 @@ main = do
633630

634631
parseTest "abcd" "ab" $ takeN 2
635632
parseTest "abcd" "" $ takeN 0
636-
parseErrorTestPosition (takeN 10) "abcd" (Position { column: 1, line: 1 })
637-
parseErrorTestPosition (takeN (-1)) "abcd" (Position { column: 1, line: 1 })
633+
parseErrorTestPosition (takeN 10) "abcd" (Position { index: 0, column: 1, line: 1 })
634+
parseErrorTestPosition (takeN (-1)) "abcd" (Position { index: 0, column: 1, line: 1 })
638635

639636
parseErrorTestMessage
640637
(noneOfCodePoints $ SCP.toCodePointArray "β“βœ…")
@@ -677,10 +674,10 @@ main = do
677674
parseTest "ababab" [ 'b', 'b', 'b' ] $ Array.many (char 'a' *> char 'b')
678675
parseTest "abaXab" [ 'b' ] $ Array.many (try (char 'a' *> char 'b'))
679676

680-
parseErrorTestPosition (string "abc") "bcd" (Position { column: 1, line: 1 })
681-
parseErrorTestPosition (string "abc" *> eof) "abcdefg" (Position { column: 4, line: 1 })
682-
parseErrorTestPosition (string "a\nb\nc\n" *> eof) "a\nb\nc\nd\n" (Position { column: 1, line: 4 })
683-
parseErrorTestPosition (string "\ta" *> eof) "\tab" (Position { column: 10, line: 1 })
677+
parseErrorTestPosition (string "abc") "bcd" (Position { index: 0, column: 1, line: 1 })
678+
parseErrorTestPosition (string "abc" *> eof) "abcdefg" (Position { index: 3, column: 4, line: 1 })
679+
parseErrorTestPosition (string "a\nb\nc\n" *> eof) "a\nb\nc\nd\n" (Position { index: 6, column: 1, line: 4 })
680+
parseErrorTestPosition (string "\ta" *> eof) "\tab" (Position { index: 2, column: 10, line: 1 })
684681

685682
log "\nTESTS number\n"
686683

0 commit comments

Comments
Β (0)