cow code analysis
COW is a HTTP proxy to simplify bypassing the great firewall. It tries to automatically identify blocked websites and only use parent proxy for those sites.
Features
- As a HTTP proxy, can be used by mobile devices
- Supports HTTP, SOCKS5, shadowsocks and COW itself as parent proxy
- Supports simple load balancing between multiple parent proxies
- Automatically identify blocked websites, only use parent proxy for those sites
- Generate and serve PAC file for browser to bypass COW for best performance
- Contain domains that can be directly accessed (recorded accoring to your visit history)
Visited site recording
COW records all visited hosts and visit count in stat
(which is a json file) under the same directory with config file.
- For unknown site, first try direct access, use parent proxy upon failure. After 2 minutes, try direct access again
- Builtin common blocked site in order to reduce time to discover blockage and the use parent proxy
- Hosts will be put into PAC after a few times of successful direct visit
- Hosts will use parent proxy if direct access failed for a few times
- To avoid mistakes, will try direct access with some probability
- Host will be deleted if not visited for a few days
- Hosts under builtin/manually specified blocked and direct domains will not appear in
stat
How does COW detect blocked sites
Upon the following error, one domain is considered to be blocked
- Server connection reset
- Connection to server timeout
- Read from server timeout
COW will retry HTTP request upon these errors, But if there’s some data sent back to the client, connection with the client will be dropped to signal error..
Server connection reset is usually reliable in detecting blocked sites. But timeout is not. COW tries to estimate timeout value every 30 seconds, in order to avoid considering normal sites as blocked when network condition is bad. Revert to direct access after two minutes upon first blockage is also to avoid mistakes.
If automatica timeout retry causes problem for you, try to change readTimeout
and dialTimeout
in configuration.
Example Configuration
# Line starting with # is comment and will be ignored
# Local proxy listen address
listen = http://127.0.0.1:7777
# SOCKS5 parent proxy
proxy = socks5://127.0.0.1:1080
# HTTP parent proxy
proxy = http://127.0.0.1:8080
proxy = http://user:password@127.0.0.1:8080
# shadowsocks parent proxy
proxy = ss://aes-128-cfb:password@1.2.3.4:8388
# cow parent proxy
proxy = cow://aes-128-cfb:password@1.2.3.4:8388
Implementation Design
COW uses separate goroutines to read client requests and server responses.
- For each client, COW will create one goroutine to
- accept client request (read from client connection)
- create connection if not exist
- send request to the server (write to server connection)
- reading response from the web server (read from server connection)
- send response back to the client (write to client connection)
/*entrance*/
func main() {
quit = make(chan struct{})
// Parse flags after load config to allow override options in config
cmdLineConfig := parseCmdLineConfig()
if cmdLineConfig.PrintVer {
printVersion()
os.Exit(0)
}
//initialize listenProxy and backupParentPool
parseConfig(cmdLineConfig.RcFile, cmdLineConfig)
initSelfListenAddr()
initParentPool()
if config.Core > 0 {
runtime.GOMAXPROCS(config.Core)
}
// handle signals for gracefully termination
go sigHandler()
var wg sync.WaitGroup
wg.Add(len(listenProxy))
for _, proxy := range listenProxy {
go proxy.Serve(&wg, quit)
}
wg.Wait()
if relaunch {
info.Println("Relunching cow...")
// Need to fork me.
argv0, err := lookPath()
if nil != err {
errl.Println(err)
return
}
err = syscall.Exec(argv0, os.Args, os.Environ())
if err != nil {
errl.Println(err)
}
}
debug.Println("the main process is , exiting...")
}
//listen and serve proxy request from all clients
func (hp *httpProxy) Serve(wg *sync.WaitGroup, quit <-chan struct{}) {
defer func() {
wg.Done()
}()
ln, err := net.Listen("tcp", hp.addr)
if err != nil {
fmt.Println("listen http failed:", err)
return
}
var exit bool
go func() {
<-quit
exit = true
ln.Close()
}()
host, _, _ := net.SplitHostPort(hp.addr)
var pacURL string
if host == "" || host == "0.0.0.0" {
pacURL = fmt.Sprintf("http://<hostip>:%s/pac", hp.port)
} else if hp.addrInPAC == "" {
pacURL = fmt.Sprintf("http://%s/pac", hp.addr)
} else {
pacURL = fmt.Sprintf("http://%s/pac", hp.addrInPAC)
}
info.Printf("COW %s listen http %s, PAC url %s\n", version, hp.addr, pacURL)
for {
conn, err := ln.Accept()
if err != nil && !exit {
errl.Printf("http proxy(%s) accept %v\n", ln.Addr(), err)
if isErrTooManyOpenFd(err) {
connPool.CloseAll()
}
time.Sleep(time.Millisecond)
continue
}
if exit {
debug.Println("exiting the http listner")
break
}
c := newClientConn(conn, hp)
//for each connection from client,start goroutine
go c.serve()
}
}
/*gorouting to serve request from one client*/
func (c *clientConn) serve() {
var r Request
var rp Response
var sv *serverConn
var err error
var authed bool
// For cow proxy server, authentication is done by matching password.
if _, ok := c.proxy.(*cowProxy); ok {
authed = true
}
defer func() {
r.releaseBuf()
c.Close()
}()
// Refer to implementation.md for the design choices on parsing the request
// and response.
for {
if err = parseRequest(c, &r); err != nil {//parse request from client
debug.Printf("cli(%s) parse request %v\n", c.RemoteAddr(), err)
if err == io.EOF || isErrConnReset(err) {
return
}
if err != errClientTimeout {
sendErrorPage(c, "404 Bad request", "Bad request", err.Error())
return
}
sendErrorPage(c, statusRequestTimeout, statusRequestTimeout,
"Your browser didn't send a complete request in time.")
return
}
retry:
//build server connection based on the parsed request
if sv, err = c.getServerConn(&r); err != nil {
if debug {
debug.Printf("cli(%s) failed to get server conn %v\n", c.RemoteAddr(), &r)
}
// Failed connection will send error page back to the client.
// For CONNECT, the client read buffer is released in copyClient2Server,
// so can't go back to getRequest.
if err == errPageSent && !r.isConnect {
if r.hasBody() {
// skip request body
debug.Printf("cli(%s) skip request body %v\n", c.RemoteAddr(), &r)
sendBody(SinkWriter{}, c.bufRd, int(r.ContLen), r.Chunking)
}
continue
}
return
}
//forward request from client to server and response from server to client
if err = sv.doRequest(c, &r, &rp); err != nil {
sv.Close()
if c.shouldRetry(&r, sv, err) {
goto retry
} else if err == errPageSent && (!r.hasBody() || r.hasSent()) {
// Can only continue if request has no body, or request body
// has been read.
continue
}
return
}
// Put server connection to pool, so other clients can use it.
_, isCowConn := sv.Conn.(cowConn)
if rp.ConnectionKeepAlive || isCowConn {
if debug {
debug.Printf("cli(%s) connPool put %s", c.RemoteAddr(), sv.hostPort)
}
// If the server connection is not going to be used soon,
// release buffer before putting back to pool can save memory.
sv.releaseBuf()
connPool.Put(sv)
} else {
if debug {
debug.Printf("cli(%s) server %s close conn\n", c.RemoteAddr(), sv.hostPort)
}
sv.Close()
}
if !r.ConnectionKeepAlive {
if debug {
debug.Printf("cli(%s) close connection\n", c.RemoteAddr())
}
return
}
}
}
// Connect to requested server according to whether it's marked as blocked.
// If direct connection fails, try parent proxies.
func (c *clientConn) connect(r *Request, siteInfo *VisitCnt) (srvconn net.Conn, err error) {
var errMsg string
if config.AlwaysProxy {
if srvconn, err = parentProxy.connect(r.URL); err == nil {
return
}
errMsg = genErrMsg(r, nil, "Parent proxy connection failed, always use parent proxy.")
goto fail
}
if siteInfo.AsBlocked() && !parentProxy.empty() {
// In case of connection error to socks server, fallback to direct connection
if srvconn, err = parentProxy.connect(r.URL); err == nil {
return
}
if srvconn, err = connectDirect(r.URL, siteInfo); err == nil {
return
}
errMsg = genErrMsg(r, nil, "Parent proxy and direct connection failed, maybe blocked site.")
} else {
// In case of error on direction connection, try parent server
if srvconn, err = connectDirect(r.URL, siteInfo); err == nil {
return
}
// net.Dial does two things: DNS lookup and TCP connection.
// GFW may cause failure here: make it time out or reset connection.
// debug.Printf("type of err %T %v\n", err, err)
// RST during TCP handshake is valid and would return as connection
// refused error. My observation is that GFW does not use RST to stop
// TCP handshake.
// To simplify things and avoid error in my observation, always try
// parent proxy in case of Dial error.
var socksErr error
if srvconn, socksErr = parentProxy.connect(r.URL); socksErr == nil {
c.handleBlockedRequest(r, err)//flag as blocked
if debug {
debug.Printf("cli(%s) direct connection failed, use parent proxy for %v\n",
c.RemoteAddr(), r)
}
return srvconn, nil
}
errMsg = genErrMsg(r, nil,
"Direct and parent proxy connection failed, maybe blocked site.")
}
fail:
sendErrorPage(c, "504 Connection failed", err.Error(), errMsg)
return nil, errPageSent
}