From 2b4757494d23851b8d71aedc18a048a3e5dd6c46 Mon Sep 17 00:00:00 2001 From: Volodya Lombrozo Date: Wed, 16 Oct 2024 02:38:14 +0300 Subject: [PATCH] feat(#4244): add README.md and examples of http request. Also fixed the (#4285) ambiguity in the grammar. --- http/README.md | 9 + http/examples/options.http | 2 + http/examples/ping.http | 2 + http/{test.http => examples/post.http} | 2 - http/examples/trace.http | 4 + http/http.g4 | 401 ++++++++++++------------- http/pom.xml | 2 + http/testHeader.http | 1 - 8 files changed, 209 insertions(+), 214 deletions(-) create mode 100755 http/README.md create mode 100644 http/examples/options.http create mode 100644 http/examples/ping.http rename http/{test.http => examples/post.http} (99%) create mode 100644 http/examples/trace.http delete mode 100644 http/testHeader.http diff --git a/http/README.md b/http/README.md new file mode 100755 index 0000000000..2d47f8269a --- /dev/null +++ b/http/README.md @@ -0,0 +1,9 @@ +# HTTP Grammar + +An ANTLR4 grammar for [HTTP](https://www.rfc-editor.org/rfc/rfc7230). +This grammar is based on the HTTP/1.1 specification (RFC 7230, RFC 7231) +and closely follows +the [ABNF](https://www.rfc-editor.org/rfc/rfc7230#appendix-B) format. + +Currently, the grammar is incomplete and only supports the request line and +headers. \ No newline at end of file diff --git a/http/examples/options.http b/http/examples/options.http new file mode 100644 index 0000000000..8ab23e8c1b --- /dev/null +++ b/http/examples/options.http @@ -0,0 +1,2 @@ +OPTIONS /&&&//&? HTTP/9.1 +%%: diff --git a/http/examples/ping.http b/http/examples/ping.http new file mode 100644 index 0000000000..d96d895357 --- /dev/null +++ b/http/examples/ping.http @@ -0,0 +1,2 @@ +PING /google.com HTTP/1.1 +Host: www.google.com.hk diff --git a/http/test.http b/http/examples/post.http similarity index 99% rename from http/test.http rename to http/examples/post.http index b47c8b9a98..a7cdb559a3 100644 --- a/http/test.http +++ b/http/examples/post.http @@ -13,5 +13,3 @@ X-Client-Data: CIi2yQEIorbJAQjBtskBCKmdygEIqKPKAQjwpMoBCLGnygEI4qjKAQjxqcoBCK+sy Accept-Encoding: gzip, deflate Accept-Language: zh-CN,zh;q=0.9,en;q=0.8 Cookie: NID=184=VqX86iUz6p-H_b2qbuogwjkmsk096DB-48jilOI9Pquzq8WT-aRbKsaH8UnMfvF9uHtuUtHhnJ7Z3F74bcpMNstJ5ADYV_tv09sXOJiwf3Yu-xsZ1E588v2tX6zA-J4K6c1t6t_PQP3jvtbVSdqw_YJqgU1elwvqkjzj0kBbk0I; 1P_JAR=2019-05-26-05; DV=42xzl48Lt5gpEFuauBIUhN0LQjoor5YtIbbBr4x5AQIAAAA - -PING diff --git a/http/examples/trace.http b/http/examples/trace.http new file mode 100644 index 0000000000..f426b29fd5 --- /dev/null +++ b/http/examples/trace.http @@ -0,0 +1,4 @@ +TRACE /&/&&//&&&/&&&&?/// HTTP/8.1 +`````:C D +``: #### x y z +```:!!! diff --git a/http/http.g4 b/http/http.g4 index 429e9b441a..5b2bad2b14 100644 --- a/http/http.g4 +++ b/http/http.g4 @@ -3,6 +3,7 @@ HTTP grammar. The MIT License (MIT). Copyright (c) 2024, Martin Mirchev. +Copyright (c) 2024, Volodya Lombrozo. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -24,104 +25,94 @@ THE SOFTWARE. // $antlr-format alignTrailingComments true, columnLimit 150, minEmptyLines 1, maxEmptyLinesToKeep 1, reflowComments false, useTab false // $antlr-format allowShortRulesOnASingleLine false, allowShortBlocksOnASingleLine true, alignSemicolons hanging, alignColons hanging +/* +* This grammar is based on the HTTP/1.1 specification (RFC 7230, RFC 7231). +*/ grammar http; /* - HTTP-message = start‑line ( header‑field  CRLF ) CRLF [ message‑body ] + HTTP-message = start‑line *( header‑field CRLF ) CRLF [ message‑body ] */ http_message - : start_line (header_field CRLF)* CRLF EOF //message_body + : start_line (header_field CRLF)* EOF // CRLF message_body ; /* - start-line = request‑line / status‑line + start-line = request‑line / status‑line */ start_line - : request_line + : request_line // status_line ; /* - request-line = method  SP  request‑target  SP  HTTP‑version  CRLF + request-line = method SP request‑target SP HTTP‑version CRLF */ request_line : method SP request_target SP http_version CRLF ; /* - method = token ; "GET" ; → RFC 7231 – Section 4.3.1 ; "HEAD" ; → RFC 7231 – Section 4.3.2 ; "POST" - ; → RFC 7231 – Section 4.3.3 ; "PUT" ; → RFC 7231 – Section 4.3.4 ; "DELETE" ; → RFC 7231 – Section - 4.3.5 ; "CONNECT" ; → RFC 7231 – Section 4.3.6 ; "OPTIONS" ; → RFC 7231 – Section 4.3.7 ; "TRACE" - ; → RFC 7231 – Section 4.3.8 + method = token ; "GET" ; → RFC 7231 – Section 4.3.1 ; "HEAD" ; → RFC 7231 – Section 4.3.2 ; "POST" + ; → RFC 7231 – Section 4.3.3 ; "PUT" ; → RFC 7231 – Section 4.3.4 ; "DELETE" ; → RFC 7231 – Section + 4.3.5 ; "CONNECT" ; → RFC 7231 – Section 4.3.6 ; "OPTIONS" ; → RFC 7231 – Section 4.3.7 ; "TRACE" + ; → RFC 7231 – Section 4.3.8 */ method - : 'GET' - | 'HEAD' - | 'POST' - | 'PUT' - | 'DELETE' - | 'CONNECT' - | 'OPTIONS' - | 'TRACE' + : token ; /* - request-target = origin-form / absolute-form / authority-form / asterisk-form + request-target = origin-form / absolute-form / authority-form / asterisk-form */ request_target - : origin_form + : origin_form // absolute_form | authority_form | asterisk_form ; /* - origin-form = absolute-path  [ "?"  query ] + origin-form = absolute-path [ "?" query ] */ origin_form - : absolute_path (QuestionMark query)? + : absolute_path ('?' query)? ; /* - absolute-path = 1*( "/"  segment ) + absolute-path = 1*( "/" segment ) */ absolute_path - : (Slash segment)+ + : ('/' segment)+ ; /* - segment = pchar + segment = *pchar */ segment : pchar* ; /* - query = ( pchar /  "/" /  "?" ) + query = *( pchar / "/" / "?" ) */ query - : (pchar | Slash | QuestionMark)* + : (pchar | '/' | '?')* ; /* - HTTP-version = HTTP-name '/' DIGIT  "."  DIGIT + HTTP-version = HTTP-name '/' DIGIT "." DIGIT + HTTP-name = %x48.54.54.50 ; "HTTP", case-sensitive */ http_version - : http_name DIGIT Dot DIGIT - ; - -/* - HTTP-name = %x48.54.54.50 ; "HTTP", case-sensitive - */ -http_name - : 'HTTP/' + : 'HTTP' '/' DIGIT '.' DIGIT ; /* - header-field = field-name  ":"  OWS  field-value  OWS  + header-field = field-name ":" OWS field-value OWS */ header_field - : field_name Colon OWS* field_value OWS* + : field_name ':' ows field_value ows ; /* - field-name = token + field-name = token */ field_name : token @@ -135,319 +126,307 @@ token ; /* - field-value = ( field-content / obs-fold ) + tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / "^" / "_" / "`" / + "|" / "~" / DIGIT / ALPHA + */ +tchar + : EXCLAMATION_MARK + | DOLLAR_SIGN + | HASHTAG + | PERCENT + | AMPERSAND + | SQUOTE + | STAR + | PLUS + | MINUS + | DOT + | CARET + | UNDERSCORE + | BACK_QUOTE + | VBAR + | TILDE + | DIGIT + | HEX_LETTER + | ALPHA + ; + +/* + field-value = *( field-content / obs-fold ) */ field_value - : (field_content | obs_fold)+ + : (field_content | obs_fold)* ; /* - field-content = field-vchar [ 1*( SP / HTAB )  field-vchar ] + field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] */ field_content - : field_vchar ((SP | HTAB)+ field_vchar)? + : field_vchar ((SP | HTAB)+ field_vchar)* ; /* - field-vchar = VCHAR / obs-text + OWS = *( SP / HTAB ) ; optional whitespace + */ +ows : (SP | HTAB)*; + +/* + field-vchar = VCHAR / obs-text */ field_vchar - : vCHAR + : vchar | obs_text ; /* - obs-text = %x80-FF + obs-text = %x80-FF */ obs_text : OBS_TEXT ; /* - obs-fold = CRLF  1*( SP / HTAB ) ; see RFC 7230 – Section 3.2.4 + obs-fold = CRLF 1*( SP / HTAB ) ; see RFC 7230 – Section 3.2.4 */ obs_fold : CRLF (SP | HTAB)+ ; /* - message-body = OCTET - */ -//message_body: OCTET*; - -/* - SP = %x20 ; space - */ -SP - : ' ' - ; - -/* - pchar = unreserved / pct‑encoded / sub‑delims / ":" / "@" + pchar = unreserved / pct‑encoded / sub‑delims / ":" / "@" */ pchar : unreserved - | Pct_encoded + | pct_encoded | sub_delims - | Colon - | At + | hexdig + | COLON + | AT ; /* - unreserved = ALPHA /  DIGIT /  "-" /  "." /  "_" /  "~" + pct-encoded = "%" HEXDIG HEXDIG */ -unreserved - : ALPHA - | DIGIT - | Minus - | Dot - | Underscore - | Tilde +pct_encoded + : PERCENT hexdig hexdig ; /* - ALPHA = %x41‑5A /  %x61‑7A ; A‑Z / a‑z - */ -ALPHA - : [A-Za-z] + HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" +*/ +hexdig + : DIGIT + | HEX_LETTER ; /* - DIGIT = %x30‑39 ; 0-9 + unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" */ -DIGIT - : [0-9] +unreserved + : ALPHA + | DIGIT + | MINUS + | DOT + | UNDERSCORE + | TILDE ; /* - pct-encoded = "%"  HEXDIG  HEXDIG + sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" */ -Pct_encoded - : Percent HEXDIG HEXDIG +sub_delims + : EXCLAMATION_MARK + | DOLLAR_SIGN + | AMPERSAND + | SQUOTE + | LCOLUMN + | RCOLUMN + | STAR + | PLUS + | PERIOD + | SEMICOLON + | EQUALS ; /* - HEXDIG = DIGIT /  "A" /  "B" /  "C" /  "D" /  "E" /  "F" - */ -HEXDIG - : DIGIT - | 'A' - | 'B' - | 'C' - | 'D' - | 'E' - | 'F' +VCHAR = %x21-7E ; visible (printing) characters +*/ +vchar + : LCOLUMN + | RCOLUMN + | SEMICOLON + | EQUALS + | PERIOD + | MINUS + | DOT + | UNDERSCORE + | TILDE + | QUESTION_MARK + | SLASH + | EXCLAMATION_MARK + | COLON + | AT + | DOLLAR_SIGN + | HASHTAG + | AMPERSAND + | PERCENT + | SQUOTE + | STAR + | PLUS + | CARET + | BACK_QUOTE + | VBAR + | HEX_LETTER + | ALPHA + | DIGIT + | VCHAR ; -/* - sub-delims = "!" /  "$" /  "&" /  "'" /  "(" /  ")" /  "*" /  "+" /  "," /  ";" /  "=" - */ -sub_delims - : ExclamationMark - | DollarSign - | Ampersand - | SQuote - | LColumn - | RColumn - | Star - | Plus - | SemiColon - | Period - | Equals - ; - -LColumn +LCOLUMN : '(' ; -RColumn +RCOLUMN : ')' ; -SemiColon +SEMICOLON : ';' ; -Equals +EQUALS : '=' ; -Period +PERIOD : ',' ; -/* - CRLF = CR  LF ; Internet standard newline - */ -CRLF - : '\n' - ; - -/* - tchar = "!" /  "#" /  "$" /  "%" /  "&" /  "'" /  "*" /  "+" /  "-" /  "." /  "^" /  "_" /  "`" /  - "|" /  "~" /  DIGIT /  ALPHA - */ -tchar - : ExclamationMark - | DollarSign - | Hashtag - | Percent - | Ampersand - | SQuote - | Star - | Plus - | Minus - | Dot - | Caret - | Underscore - | BackQuote - | VBar - | Tilde - | DIGIT - | ALPHA - ; - -Minus +MINUS : '-' ; -Dot +DOT : '.' ; -Underscore +UNDERSCORE : '_' ; -Tilde +TILDE : '~' ; -QuestionMark +QUESTION_MARK : '?' ; -Slash +SLASH : '/' ; -ExclamationMark +EXCLAMATION_MARK : '!' ; -Colon +COLON : ':' ; -At +AT : '@' ; -DollarSign +DOLLAR_SIGN : '$' ; -Hashtag +HASHTAG : '#' ; -Ampersand +AMPERSAND : '&' ; -Percent +PERCENT : '%' ; -SQuote +SQUOTE : '\'' ; -Star +STAR : '*' ; -Plus +PLUS : '+' ; -Caret +CARET : '^' ; -BackQuote +BACK_QUOTE : '`' ; -VBar +VBAR : '|' ; /* - OWS = ( SP / HTAB ) ; optional whitespace + DIGIT = %x30‑39 ; 0-9 */ -OWS - : SP - | HTAB +DIGIT + : [0-9] ; -/* - HTAB = %x09 ; horizontal tab + /* + HEX_LETTER = "A" / "B" / "C" / "D" / "E" / "F" */ -HTAB - : '\t' - ; +HEX_LETTER: [A-F]; /* - VCHAR = %x21-7E ; visible (printing) characters + ALPHA = %x41‑5A / %x61‑7A ; A‑Z / a‑z */ -vCHAR - : ALPHA - | DIGIT - | VCHAR +ALPHA + : [A-Za-z] ; +/* + VCHAR = %x21-7E ; visible (printing) characters + */ VCHAR - : ExclamationMark - | '"' - | Hashtag - | DollarSign - | Percent - | Ampersand - | SQuote - | LColumn - | RColumn - | RColumn - | Star - | Plus - | Period - | Minus - | Dot - | Slash - | Colon - | SemiColon - | '<' - | Equals - | '>' - | QuestionMark - | At - | '[' - | '\\' - | Caret - | Underscore - | ']' - | BackQuote - | '{' - | '}' - | VBar - | Tilde + : '\u0021' .. '\u007e' ; +/* + OBS_TEXT = %x80-FF +*/ OBS_TEXT : '\u0080' ..'\u00ff' ; /* - OCTET = %x00-FF ; 8 bits of data + SP = %x20 ; space + */ +SP + : ' ' + ; + +/* + HTAB = %x09 ; horizontal tab */ -//OCTET: '\u0000' .. '\u001f' | VCHAR | '\u007f' .. '\u00ff' ; \ No newline at end of file +HTAB + : '\t' + ; + +/* + CRLF= CR LF ; Internet standard newline + */ +CRLF + : '\r\n' | '\n' + ; diff --git a/http/pom.xml b/http/pom.xml index 0db7dbf7ae..b2aca2d6cd 100644 --- a/http/pom.xml +++ b/http/pom.xml @@ -40,11 +40,13 @@ false http_message http + .http examples/ + test-http test diff --git a/http/testHeader.http b/http/testHeader.http deleted file mode 100644 index f175e96007..0000000000 --- a/http/testHeader.http +++ /dev/null @@ -1 +0,0 @@ -Host: www.google.com.hk