You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

560 lines
17 KiB

// -*-go-*-
//
// Ragel SIP Message Parser
//
// This file is compiled into Go code by the Ragel State Machine Compiler for
// the purpose of converting SIP messages into a Msg data structure. This
// machine works in tandem with the Ragel machine defined in uri_parse.rl.
//
// Perhaps it would have been better if the authors of this protocol had chosen
// to use a binary serialization format like protocol buffers. But instead they
// chose to create a plaintext protocol that looks similar to HTTP requests,
// but are phenomenally more complicated.
//
// SIP messages are quite insane.
//
// o Whitespace can be used liberally in a variety of different ways.
//
// - Via host:port can have whitespace, e.g. "host \t: port"
//
// o UTF-8 is supported in some places but not others.
//
// o Headers can span multiple lines.
//
// o Header values can contain comments, e.g. Message: lol (i'm (hidden))
//
// o Header names are case-insensitive and have shorthand notation.
//
// o There's ~50 standard headers, many of which have custom parsing rules.
//
// o URIs can have ;params;like=this
//
// - Params can belong either to a URI or Addr object, e.g. <sip:uri;param>
// cf. <sip:uri>;param
//
// - Addresses may omit angle brackets, in which case params belong to the
// Addr object.
//
// - URI params ;are=escaped%20like%22this but params belonging to Addr
// ;are="escaped like\"this"
//
// - Backslash escaping is not like C, e.g. \t\n -> tn
//
// - Address display name can have whitespace without quotes, which is
// collapsed. Quoted form is not collapsed.
//
// o Via and address headers can be repeated in two ways: repeating the
// header, using commas within a single header, or both.
//
// See: http://www.colm.net/files/ragel/ragel-guide-6.9.pdf
// See: http://zedshaw.com/archive/ragel-state-charts/
package sip
import (
"errors"
"fmt"
"github.com/jart/gosip/sdp"
)
%% machine msg;
%% write data;
// ParseMsg turns a SIP message into a data structure.
func ParseMsg(s string) (msg *Msg, err error) {
if s == "" {
return nil, errors.New("Empty SIP message")
}
return ParseMsgBytes([]byte(s))
}
// ParseMsg turns a SIP message byte slice into a data structure.
func ParseMsgBytes(data []byte) (msg *Msg, err error) {
if data == nil {
return nil, nil
}
msg = new(Msg)
viap := &msg.Via
cs := 0
p := 0
pe := len(data)
eof := len(data)
line := 1
linep := 0
buf := make([]byte, len(data))
amt := 0
mark := 0
clen := 0
ctype := ""
var name string
var hex byte
var value *string
var addr **Addr
var via *Via
%%{
action hold {
fhold;
}
action break {
fbreak;
}
action line {
line++
linep = p + 1
}
action mark {
mark = p
}
action start {
amt = 0
}
action append {
buf[amt] = fc
amt++
}
action space {
buf[amt] = ' '
amt++
}
action collapse {
amt = appendCollapse(buf, amt, fc)
}
action hexHi {
hex = unhex(fc) * 16
}
action hexLo {
hex += unhex(fc)
buf[amt] = hex
amt++
}
action lower {
amt = appendLower(buf, amt, fc)
}
action Method {
msg.Method = string(data[mark:p])
}
action VersionMajor {
msg.VersionMajor = msg.VersionMajor * 10 + (fc - 0x30)
}
action VersionMinor {
msg.VersionMinor = msg.VersionMinor * 10 + (fc - 0x30)
}
action RequestURI {
msg.Request, err = ParseURIBytes(data[mark:p])
if err != nil { return nil, err }
}
action StatusCode {
msg.Status = msg.Status * 10 + (int(fc) - 0x30)
}
action ReasonPhrase {
msg.Phrase = string(buf[0:amt])
}
action Via {
*viap = via
viap = &via.Next
// via = nil
}
action ViaProtocol {
via.Protocol = string(data[mark:p])
}
action ViaVersion {
via.Version = string(data[mark:p])
}
action ViaTransport {
via.Transport = string(data[mark:p])
}
action ViaHost {
via.Host = string(data[mark:p])
}
action ViaPort {
via.Port = via.Port * 10 + (uint16(fc) - 0x30)
}
action ViaParam {
if via.Params == nil {
via.Params = Params{}
}
via.Params[name] = string(buf[0:amt])
}
action goto_header {
fgoto header;
}
action goto_value {
fgoto value;
}
action goto_via {
via = new(Via)
fgoto via;
}
action goto_via_param {
amt = 0 // Needed so ViaParam action works when there's no value.
fgoto via_param;
}
action gxh {
fhold;
fgoto xheader;
}
action name {
name = string(data[mark:p])
}
action value {{
b := data[mark:p - 1]
if value != nil {
*value = string(b)
} else if addr != nil {
*addr, err = ParseAddrBytes(b, *addr)
if err != nil { return nil, err }
} else {
if msg.Headers == nil {
msg.Headers = Headers{}
}
msg.Headers[name] = string(b)
}
}}
action new_addr {
addr = new(Addr)
}
action addr_display {
addr.Display = strings.TrimRight(string(buf[0:amt]), " \t\r\n")
}
action addr_uri {
addr.Uri, err = ParseURIBytes(data[mark:p])
if err != nil { return nil, err }
}
action addr_param {
if addr.Params == nil {
addr.Params = Params{}
}
addr.Params[name] = string(buf[0:amt])
}
action CallID {
msg.CallID = string(data[mark:p])
}
action ContentLength {
clen = clen * 10 + (int(fc) - 0x30)
}
action ContentType {
ctype = string(data[mark:p])
}
action CSeq {
msg.CSeq = msg.CSeq * 10 + (int(fc) - 0x30)
}
action CSeqMethod {
msg.CSeqMethod = string(data[mark:p])
}
action Expires {
msg.Expires = msg.Expires * 10 + (int(fc) - 0x30)
}
action MaxForwards {
msg.MaxForwards = msg.MaxForwards * 10 + (int(fc) - 0x30)
}
action MinExpires {
msg.MinExpires = msg.MinExpires * 10 + (int(fc) - 0x30)
}
action lookAheadWSP { lookAheadWSP(data, p, pe) }
# https://tools.ietf.org/html/rfc2234
SP = " ";
HTAB = "\t";
CR = "\r";
LF = "\n" @line;
DQUOTE = "\"";
CRLF = CR LF;
WSP = SP | HTAB;
LWS = ( WSP* ( CR when lookAheadWSP ) LF )? WSP+;
SWS = LWS?;
LWSCRLF_append = ( CR when lookAheadWSP ) @append LF @append;
LWS_append = ( WSP* @append LWSCRLF_append )? WSP+ @append;
UTF8_CONT = 0x80..0xBF @append;
UTF8_NONASCII = 0xC0..0xDF @append UTF8_CONT {1}
| 0xE0..0xEF @append UTF8_CONT {2}
| 0xF0..0xF7 @append UTF8_CONT {3}
| 0xF8..0xFb @append UTF8_CONT {4}
| 0xFC..0xFD @append UTF8_CONT {5};
UTF8 = 0x21..0x7F @append | UTF8_NONASCII;
mUTF8_CONT = 0x80..0xBF;
mUTF8_NONASCII = 0xC0..0xDF mUTF8_CONT {1}
| 0xE0..0xEF mUTF8_CONT {2}
| 0xF0..0xF7 mUTF8_CONT {3}
| 0xF8..0xFb mUTF8_CONT {4}
| 0xFC..0xFD mUTF8_CONT {5};
mUTF8 = 0x21..0x7F | mUTF8_NONASCII;
# https://tools.ietf.org/html/rfc3261#section-25.1
reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," ;
mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" ;
unreserved = alnum | mark ;
tokenc = alnum | "-" | "." | "!" | "%" | "*" | "_" | "+" | "`"
| "'" | "~" ;
separators = "(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\"
| "\"" | "/" | "[" | "]" | "?" | "=" | "{" | "}" | SP
| HTAB ;
wordc = alnum | "-" | "." | "!" | "%" | "*" | "_" | "+" | "`"
| "'" | "~" | "(" | ")" | "<" | ">" | ":" | "\\" | "\""
| "/" | "[" | "]" | "?" | "{" | "}" ;
schmchars = alnum | "+" | "-" | "." ;
word = wordc+;
STAR = SWS "*" SWS;
SLASH = SWS "/" SWS;
EQUAL = SWS "=" SWS;
LPAREN = SWS "(" SWS;
RPAREN = SWS ")" SWS;
RAQUOT = ">" SWS;
LAQUOT = SWS "<";
COMMA = SWS "," SWS;
SEMI = SWS ";" SWS;
COLON = SWS ":" SWS;
HCOLON = WSP* ":" SWS;
LDQUOT = SWS "\"";
RDQUOT = "\"" SWS;
escaped = "%" ( xdigit @hexHi ) ( xdigit @hexLo ) ;
ipv4 = digit | "." ;
ipv6 = xdigit | "." | ":" ;
hostname = alpha | digit | "-" | "." ;
uric = reserved | unreserved | "%" | "[" | "]";
uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" | "&" | "="
| "+" | "$" | "," ;
uri = alpha schmchars* ":" uric+;
token = tokenc+;
tokenhost = ( tokenc | "[" | "]" | ":" )+;
reasonc = UTF8_NONASCII | ( reserved | unreserved | SP | HTAB ) @append;
reasonmc = escaped | reasonc;
cid = word ( "@" word )?;
Method = token >mark %Method;
SIPVersionNo = digit+ @VersionMajor "." digit+ @VersionMinor;
RequestURI = ^SP+ >mark %RequestURI;
StatusCode = ( digit @StatusCode ) {3};
ReasonPhrase = reasonmc+ >start %ReasonPhrase;
hval = ( mUTF8 | LWS )* >mark;
# Quoted strings can have just about anything, including backslash escapes,
# which aren't quite as fancy as the ones you'd see in programming.
qdtextc = 0x21 | 0x23..0x5B | 0x5D..0x7E;
qdtext = UTF8_NONASCII | LWS_append | qdtextc @append;
quoted_pair = "\\" ( 0x00..0x09 | 0x0B..0x0C | 0x0E..0x7F ) @append;
quoted_content = ( qdtext | quoted_pair )* >start;
quoted_string = SWS DQUOTE quoted_content DQUOTE;
# Via Parsing
#
# Parsing these is kind of difficult because infinite whitespace is allowed
# between colons, semicolons, commas, and don't forget that lines can
# continue. So we're going to break things down into four separate machines
# that jump between each other.
ViaProtocol = token >mark %ViaProtocol;
ViaVersion = token >mark %ViaVersion;
ViaTransport = token >mark %ViaTransport;
ViaSent = ViaProtocol SLASH ViaVersion SLASH ViaTransport;
ViaHostIPv4 = ( digit | "." )+ >mark %ViaHost;
ViaHostIPv6 = "[" ( xdigit | "." | ":" )+ >mark %ViaHost "]";
ViaHostName = ( alnum | "." | "-" )+ >mark %ViaHost;
ViaHost = ViaHostIPv4 | ViaHostIPv6 | ViaHostName;
ViaPort = digit+ @ViaPort;
ViaParamName = token >mark %name;
ViaParamContent = tokenhost >start @append;
ViaParamValue = ViaParamContent | quoted_string;
via_param_end = CRLF @ViaParam @Via @goto_header
| SEMI <: any @hold @ViaParam @goto_via_param
| COMMA <: any @hold @ViaParam @Via @goto_via;
via_param := ViaParamName (EQUAL ViaParamValue)? via_param_end;
via_end = CRLF @Via @goto_header
| SEMI <: any @hold @goto_via_param
| COMMA <: any @hold @Via @goto_via;
via := ViaSent LWS ViaHost (COLON ViaPort)? via_end;
# Address Header Name Definitions
#
# These headers set the addr pointer to tell the 'value' machine where to
# store the value after using ParseAddrBytes().
aname = ("Contact"i | "m"i) %{addr=&msg.Contact}
| ("From"i | "f"i) %{addr=&msg.From}
| "P-Asserted-Identity"i %{addr=&msg.PAssertedIdentity}
| "Record-Route"i %{addr=&msg.RecordRoute}
| "Remote-Party-ID"i %{addr=&msg.RemotePartyID}
| "Route"i %{addr=&msg.Route}
| ("To"i | "t"i) %{addr=&msg.To}
;
# String Header Name Definitions
#
# These headers set the value pointer to tell the 'value' machine where to
# store the resulting token string.
sname = "Accept"i %{value=&msg.Accept}
| ("Accept-Contact"i | "a"i) %{value=&msg.AcceptContact}
| "Accept-Encoding"i %{value=&msg.AcceptEncoding}
| "Accept-Language"i %{value=&msg.AcceptLanguage}
| ("Allow"i | "u"i) %{value=&msg.Allow}
| ("Allow-Events"i | "u"i) %{value=&msg.AllowEvents}
| "Alert-Info"i %{value=&msg.AlertInfo}
| "Authentication-Info"i %{value=&msg.AuthenticationInfo}
| "Authorization"i %{value=&msg.Authorization}
| "Content-Disposition"i %{value=&msg.ContentDisposition}
| "Content-Language"i %{value=&msg.ContentLanguage}
| ("Content-Encoding"i | "e"i) %{value=&msg.ContentEncoding}
| "Call-Info"i %{value=&msg.CallInfo}
| "Date"i %{value=&msg.Date}
| "Error-Info"i %{value=&msg.ErrorInfo}
| ("Event"i | "o"i) %{value=&msg.Event}
| "In-Reply-To"i %{value=&msg.InReplyTo}
| "Reply-To"i %{value=&msg.ReplyTo}
| "MIME-Version"i %{value=&msg.MIMEVersion}
| "Organization"i %{value=&msg.Organization}
| "Priority"i %{value=&msg.Priority}
| "Proxy-Authenticate"i %{value=&msg.ProxyAuthenticate}
| "Proxy-Authorization"i %{value=&msg.ProxyAuthorization}
| "Proxy-Require"i %{value=&msg.ProxyRequire}
| ("Refer-To"i | "r"i) %{value=&msg.ReferTo}
| ("Referred-By"i | "b"i) %{value=&msg.ReferredBy}
| "Require"i %{value=&msg.Require}
| "Retry-After"i %{value=&msg.RetryAfter}
| "Server"i %{value=&msg.Server}
| ("Subject"i | "s"i) %{value=&msg.Subject}
| ("Supported"i | "k"i) %{value=&msg.Supported}
| "Timestamp"i %{value=&msg.Timestamp}
| "Unsupported"i %{value=&msg.Unsupported}
| "User-Agent"i %{value=&msg.UserAgent}
| "Warning"i %{value=&msg.Warning}
| "WWW-Authenticate"i %{value=&msg.WWWAuthenticate}
;
# Custom Header Definitions
#
# These headers do not jump to the 'value' machine, but instead specify
# their own special type of parsing.
cheader = ("Call-ID"i | "i"i) $!gxh HCOLON cid >mark %CallID
| ("Content-Length"i | "l"i) $!gxh HCOLON digit+ >{clen=0} @ContentLength
| ("Content-Type"i | "c"i) $!gxh HCOLON <: hval %ContentType
| "CSeq"i $!gxh HCOLON (digit+ @CSeq) LWS token >mark %CSeqMethod
| ("Expires"i | "l"i) $!gxh HCOLON digit+ >{msg.Expires=0} @Expires
| ("Max-Forwards"i | "l"i) $!gxh HCOLON digit+ >{msg.MaxForwards=0} @MaxForwards
| ("Min-Expires"i | "l"i) $!gxh HCOLON digit+ >{msg.MinExpires=0} @MinExpires
;
# Header Parsing
#
# The header machine parses a single header and then jumps to itself to
# loop. When the final CRLF is observed, we then break out of the Ragel
# parser and let the Go code handle payload extraction.
#
# Parsing standard header names is a prefix trie search in generated code.
# Lookahead to set the mark on the header name. In order to support
# extended headers, we'll use $!gxh to jump to the xheader machine when an
# unrecognized character is detected in the header name.
#
# An independent machine has been created for generic header values, so
# that it doesn't need to be duplicated for each leaf in the prefix
# trie. When the value machine has finished reading a value, it'll be
# parsed and stored based on whether the value/addr pointers are set.
#
# Header values can span multiple lines. Lookahead is used in the LWS
# definition to check for whitespace at the start of the next line upon
# encountering a line feed character, in order to determine if a line
# continuation is present.
#
# In order to concatenate across machines, we use lookahead in conjunction
# with the left-guarded concatenation operator. This pattern works is
# defined as follows: `foo <: any @hold @goto_bar`.
#
# Header names are case insensitive. Each recognized header is assigned to
# a specific field in the Msg data structure. Extended headers are stored
# to a linked list data structure with the casing preserved. This is so
# messages can be reproduced with roughly the same appearance. It is the
# responsibility of the person using Msg.Headers to do case-insensitive
# string comparisons.
value := hval <: CRLF @value @goto_header;
xheader := token %name HCOLON <: any @{value=nil;addr=nil} @hold @goto_value;
sheader = cheader <: CRLF @goto_header
| aname $!gxh HCOLON <: any @{value=nil} @hold @goto_value
| sname $!gxh HCOLON <: any @{addr=nil} @hold @goto_value
| ("Via"i | "v"i) $!gxh HCOLON <: any @hold @goto_via;
header := CRLF @break
| tokenc @mark @hold sheader;
# Start Line Parsing
#
# The Request and Response definitions are very straightforward, and the
# main machine is the union of the two. Once the line feed character has
# been observed, we then jump to the header machine.
SIPVersion = "SIP/" SIPVersionNo;
Request = Method SP RequestURI SP SIPVersion CRLF @goto_header;
Response = SIPVersion SP StatusCode SP ReasonPhrase CRLF @goto_header;
main := Request | Response;
write init;
write exec;
}%%
if cs < msg_first_final {
if p == pe {
return nil, errors.New(fmt.Sprintf("Incomplete SIP message: %s", data))
} else {
return nil, errors.New(fmt.Sprintf("Error in SIP message at line %d offset %d:\n%s", line, p - linep, data))
}
}
if clen > 0 {
if clen != len(data) - p {
return nil, errors.New(fmt.Sprintf("Content-Length incorrect: %d != %d", clen, len(data) - p))
}
if ctype == sdp.ContentType {
ms, err := sdp.Parse(string(data[p:len(data)]))
if err != nil { return nil, err }
msg.Payload = ms
} else {
msg.Payload = &MiscPayload{T: ctype, D: data[p:len(data)]}
}
}
return msg, nil
}
func lookAheadWSP(data []byte, p, pe int) bool {
return p + 2 < pe && (data[p+2] == ' ' || data[p+2] == '\t')
}