Hi,
this is Ambrose at acli@ada.dhs.org.
I've hacked up a big mess (patch) against w3m-0.1.9 primarily
involving version 0 cookies. To my dismay, it seems that most
servers out there still want version 0 cookies and version 0
cookie handling behaviour, and w3m's cookie handling is too
strict for version 0, causing some sites (notably my.yahoo.co.jp)
not to work.
The patch mainly contains the following:
- Version 0 "tail-match" support (I still haven't checked all
version-0-related code, though). (Note that "tail-match" is
much sloppier than RFC 2109's "domain-match" and may cause
security violations. Some compile time and/or command line
option may be desired.) The cookie jar screen is slightly
modified to display the cookie version.
- Avoid calling gethostbyname() under any circumstances if a
proxy is used
- Display a short reason when a cookie is rejected. Allow the
user to accept cookies that may cause security violations
(not enabled by default).
Plus some other perhaps not very useful stuff. Some of the mess
is due to the fact that I originally patched it for w3m-0.1.8,
but w3m-0.1.9 came out before I could submit the patch (^_^).
Sorry.
With the version 0 patch my.yahoo.co.jp is almost usable now,
other than the fact that it insists on checking the user-agent
string for the first login, and that it insists on having
Javascript to read mail. IMHO it is my.yahoo.co.jp that really
should be fixed...
BTW could someone mail me where I can find the i18n branch of w3m?
Thanks.
----------- 8< --------------- cut here -------------- 8< --------------
diff -ur w3m-0.1.9.dist/configure w3m-0.1.9.jis/configure
--- w3m-0.1.9.dist/configure Thu Apr 20 23:37:24 2000
+++ w3m-0.1.9.jis/configure Fri Apr 28 21:52:25 2000
@@ -566,7 +566,7 @@
fi
echo "Do you want to use SSL?"
-echo '(You need openSSL library; Please see http://www.openssl.org/)'
+echo '(You need OpenSSL library; Please see http://www.openssl.org/)'
yesno use_ssl "$use_ssl" n
echo "use_ssl=$use_ssl" >> config.param
if [ "$use_ssl" = y ]; then
@@ -582,6 +582,7 @@
if [ "$use_ssl" = y ]; then
echo "Do you want SSL verification support"
+ echo '(Your SSL library must be version 0.8 or later)'
yesno use_ssl_verify "$use_ssl_verify" n
echo "use_ssl_verify=$use_ssl_verify" >> config.param
if [ "$use_ssl_verify" = y ]; then
diff -ur w3m-0.1.9.dist/cookie.c w3m-0.1.9.jis/cookie.c
--- w3m-0.1.9.dist/cookie.c Thu Apr 20 21:47:40 2000
+++ w3m-0.1.9.jis/cookie.c Fri Apr 28 23:28:57 2000
@@ -22,6 +22,7 @@
m1,
offset;
+ /* RFC 2109 s. 2, "domain-match", case 1 (both are IP and identical) */
regexCompile("[0-9][0-9]*\\.[0-9][0-9]*\\.[0-9][0-9]*\\.[0-9][0-9]*", 0);
m0 = regexMatch(host, 1);
m1 = regexMatch(domain, 1);
@@ -29,6 +30,7 @@
if (strcasecmp(host, domain) == 0)
return host;
}
+ /* RFC 2109 s. 2, cases 2, 3 */
else if (!m0 && !m1) {
offset = (domain[0] != '.') ? 0 : strlen(host) - strlen(domain);
if (offset >= 0 && strcasecmp(&host[offset], domain) == 0)
@@ -37,6 +39,70 @@
return NULL;
}
+static char *
+tail_match(char *host, char *domain)
+{
+ int m0, m1, offset;
+
+ /* http://www.netscape.com/newsref/std/cookie_spec.html defined
+ "tail-matching" quite sloppily. In particular case 1 of RFC 2109
+ s. 2 was not considered. We use RFC 2109 semantics in the case
+ where IP numbers are involved. */
+
+ regexCompile("[0-9][0-9]*\\.[0-9][0-9]*\\.[0-9][0-9]*\\.[0-9][0-9]*",0);
+ m0 = regexMatch(host,1);
+ m1 = regexMatch(domain,1);
+ if (m0 && m1 && strcmp(host, domain) == 0) {
+ return host;
+
+ /* Version 0 "tail-matching" for non-IP-number hosts names */
+ } else if (!m0 && !m1 && !nodots(host, NULL)) {
+ if (strcasecmp(host, domain) == 0) {
+ return host;
+ } else {
+ int L_host = strlen(host);
+ int L_domain = strlen(domain);
+ if (L_host >= L_domain + 1
+ && domain[0] == '.'
+ && strcasecmp(&host[L_host - L_domain], domain) == 0)
+ /* domain actually starts with a . (i.e., same as RFC 2109) */
+ return &host[L_host - L_domain];
+ else if (L_host >= L_domain + 2
+ && host[L_host - L_domain - 1] == '.'
+ && strcasecmp(&host[L_host - L_domain], domain) == 0)
+ /* domain has no leading dot */
+ return &host[L_host - L_domain - 1];
+ }
+ }
+ return NULL;
+}
+
+static char *
+domain_or_tail_match(char *host, char *domain, int version)
+{
+ return version? domain_match(host, domain): tail_match(host, domain);
+}
+
+static char *
+fqdn( char *host, int scheme )
+{
+ /* This is a wrapper for the FQDN() function.
+
+ Note that if we are using a valid proxy, we will want to avoid calling
+ FQDN() to avoid resolving domain names twice, with the results of the
+ client-side resolution effectively discarded. We presumably only get a
+ cookie if we use HTTP or HTTPS, vs. FTP etc., so we ignore other scheme.
+ Note that we have no "HTTPS proxy" setting. */
+
+ char *domainname = NULL;
+ if (scheme == SCM_HTTP && HTTP_proxy != NULL && !Do_not_use_proxy && !check_no_proxy(host)) {
+ domainname = host;
+ } else {
+ domainname = FQDN(host);
+ }
+ return domainname;
+}
+
static struct portlist *
make_portlist(Str port)
{
@@ -126,12 +192,13 @@
static int
match_cookie(ParsedURL * pu, struct cookie *cookie)
{
- char *domainname = FQDN(pu->host);
+ char *domainname = fqdn(pu->host, pu->scheme);
+ int L_domainname;
if (!domainname)
return 0;
- if (!domain_match(domainname, cookie->domain->ptr))
+ if (!domain_or_tail_match(domainname, cookie->domain->ptr, cookie->version))
return 0;
if (strncmp(cookie->path->ptr, pu->file, cookie->path->length) != 0)
return 0;
@@ -190,6 +257,7 @@
return NULL;
tmp = Strnew();
+ /* XXX The version check is not necessary according to RFC 2109 s. 10.1.1 */
if (version > 0)
Strcat(tmp, Sprintf("$Version=\"%d\"; ", version));
@@ -197,6 +265,7 @@
for (p1 = fco->next; p1; p1 = p1->next) {
Strcat_charp(tmp, "; ");
Strcat(tmp, make_cookie(p1));
+ /* XXX The version check is not necessary according to RFC 2109 s. 10.1.1 */
if (version > 0) {
if (p1->flag & COO_PATH)
Strcat(tmp, Sprintf("; $Path=\"%s\"", p1->path->ptr));
@@ -232,13 +301,16 @@
Str port, Str commentURL)
{
struct cookie *p;
- char *domainname = FQDN(pu->host);
+ char *domainname = fqdn(pu->host, pu->scheme);
Str odomain = domain,
opath = path;
struct portlist *portlist = NULL;
+ int use_security = !(flag & COO_OVERRIDE);
+
+#define COOKIE_ERROR(err) if(!((err) & COO_OVERRIDE_OK) || use_security) return (err)
#ifdef DEBUG
- fprintf(stderr, "host: [%s, %s] %d\n", pu->host, pu->file, secure);
+ fprintf(stderr, "host: [%s, %s] %d\n", pu->host, pu->file, flag & COO_SECURE);
fprintf(stderr, "cookie: [%s=%s]\n", name->ptr, value->ptr);
fprintf(stderr, "expires: [%s]\n", asctime(gmtime(&expires)));
if (domain)
@@ -249,20 +321,24 @@
if (port)
fprintf(stderr, "port: [%s]\n", port->ptr);
#endif /* DEBUG */
+ /* RFC 2109 s. 4.3.2 case 2; but this (no request-host) shouldn't happen */
if (!domainname)
- return 1;
+ return COO_ENODOT;
if (domain) {
char *dp;
char **sdomain;
-#if 0
- if (domain->ptr[0] != '.')
- return 1;
-#endif /* 0 */
+#if 1
+ /* RFC 2109 s. 4.3.2 case 2 subcase 2 (does not apply for version 0) */
+ if (version > 0 && domain->ptr[0] != '.')
+ COOKIE_ERROR(COO_ENOTV1DOM);
+#endif /* 1 */
+ /* RFC 2109 s. 4.3.2 case 2 subcase 1 */
if (nodots(&domain->ptr[1], &domain->ptr[domain->length]))
- return 1;
- if (!(dp = domain_match(domainname, domain->ptr)))
- return 1;
+ COOKIE_ERROR(COO_ENODOT);
+ /* RFC 2109 s. 4.3.2 case 3, or version 0 "tail-matching" check */
+ if (!(dp = domain_or_tail_match(domainname, domain->ptr, version)))
+ COOKIE_ERROR(version? COO_EDOM: COO_ETAIL);
if (version == 0) {
for (sdomain = special_domain; *sdomain; sdomain++) {
int offset = domain->length - strlen(*sdomain);
@@ -270,21 +346,25 @@
break;
}
if (!*sdomain && !nodots(domainname, dp))
- return 1;
+ COOKIE_ERROR(COO_ESPECIAL);
}
else {
+ /* RFC 2109 s. 4.3.2 case 4 */
+ /* Invariant: dp contains matched domain */
if (!nodots(domainname, dp))
- return 1;
+ COOKIE_ERROR(COO_EBADHOST);
}
}
if (path) {
+ /* RFC 2109 s. 4.3.2 case 1 */
+ /* XXX Why is the version 0 check here? */
if (version > 0 && strncmp(path->ptr, pu->file, path->length) != 0)
- return 1;
+ COOKIE_ERROR(COO_EPATH);
}
if (port) {
portlist = make_portlist(port);
if (portlist && !port_match(portlist, pu->port))
- return 1;
+ COOKIE_ERROR(COO_EPORT);
}
if (!domain)
@@ -523,7 +603,11 @@
}
if (p->commentURL) {
Strcat_charp(src, "<tr><td width=\"80\"><b>CommentURL:</b></td><td>");
+ Strcat_charp(src, "<a href=\"");
+ Strcat_charp(src, htmlquote_str(p->commentURL->ptr));
+ Strcat_charp(src, "\">");
Strcat_charp(src, htmlquote_str(p->commentURL->ptr));
+ Strcat_charp(src, "</a>");
Strcat_charp(src, "</td></tr>");
}
if (tmp2[0]) {
@@ -533,6 +617,9 @@
Strcat_charp(src, " (Discard)");
Strcat_charp(src, "</td></tr>");
}
+ Strcat_charp(src, "<tr><td width=\"80\"><b>Version:</b></td><td>");
+ Strcat_charp(src, Sprintf("%d", p->version)->ptr);
+ Strcat_charp(src, "</td></tr><tr><td>");
if (p->domain) {
Strcat_charp(src, "<tr><td width=\"80\"><b>Domain:</b></td><td>");
Strcat_charp(src, htmlquote_str(p->domain->ptr));
diff -ur w3m-0.1.9.dist/etc.c w3m-0.1.9.jis/etc.c
--- w3m-0.1.9.dist/etc.c Thu Apr 20 23:45:53 2000
+++ w3m-0.1.9.jis/etc.c Fri Apr 28 21:47:18 2000
@@ -1070,6 +1070,7 @@
break;
switch (SSL_get_error(ss->handle, ss->status)) {
case SSL_ERROR_WANT_READ:
+ case SSL_ERROR_WANT_WRITE: /* see SSL_get_error(3) */
continue;
default:
break;
diff -ur w3m-0.1.9.dist/file.c w3m-0.1.9.jis/file.c
--- w3m-0.1.9.dist/file.c Thu Apr 20 23:45:53 2000
+++ w3m-0.1.9.jis/file.c Thu Apr 27 20:53:00 2000
@@ -105,6 +105,20 @@
#define HR_RULE_WIDTH 1
#endif /* not KANJI_SYMBOLS */
+#ifdef USE_COOKIE
+/* This array should be somewhere else */
+char *violations[COO_EMAX] = {
+ "tail match failed",
+ "wrong number of dots",
+ "RFC 2109 4.3.2 rule 1",
+ "RFC 2109 4.3.2 rule 2.1",
+ "RFC 2109 4.3.2 rule 2.2",
+ "RFC 2109 4.3.2 rule 3",
+ "RFC 2109 4.3.2 rule 4",
+ "RFC XXXX 4.3.2 rule 5"
+};
+#endif
+
#ifndef STRCHR
char *
strchr(char *s, char c)
@@ -489,10 +503,18 @@
p++;
SKIP_BLANKS(p);
if (matchattr(p, "expires", 7, &tmp)) {
+ /* version 0 */
expires = mymktime(tmp->ptr);
}
else if (matchattr(p, "max-age", 7, &tmp)) {
+#if 0
expires = time(NULL) + atol(tmp->ptr);
+#else
+ long max_age = atol(tmp->ptr);
+ if (max_age == 0)
+ flag |= COO_DISCARD; /* RFC 2109 ss. 4.2.1, 4.2.2 */
+ expires = time(NULL) + max_age;
+#endif
}
else if (matchattr(p, "domain", 6, &tmp)) {
domain = tmp;
@@ -510,12 +532,15 @@
version = atoi(tmp->ptr);
}
else if (matchattr(p, "port", 4, &tmp)) {
+ /* version 1, Set-Cookie2 */
port = tmp;
}
else if (matchattr(p, "commentURL", 10, &tmp)) {
+ /* version 1, Set-Cookie2 */
commentURL = tmp;
}
else if (matchattr(p, "discard", 7, NULL)) {
+ /* version 1, Set-Cookie2 */
flag |= COO_DISCARD;
}
quoted = 0;
@@ -526,15 +551,49 @@
}
}
if (pu && name->length > 0) {
+ int err;
if (flag & COO_SECURE)
disp_message_nsec("Received a secured cookie", FALSE, 1, TRUE, FALSE);
else
disp_message_nsec(Sprintf("Received cookie: %s=%s",
name->ptr, value->ptr)->ptr, FALSE, 1, TRUE, FALSE);
- if (add_cookie(pu, name, value, expires, domain, path, flag,
- comment, version, port, commentURL))
- disp_message_nsec("This cookie was rejected "
- "to prevent security violation.", FALSE, 10, TRUE, FALSE);
+ err = add_cookie(pu, name, value, expires, domain, path, flag,
+ comment, version, port, commentURL);
+ if (err) {
+ /* XXX This should probably use some other, more friendly means of
+ inputting? */
+ char *ans = accept_bad_cookie == TRUE? "y": NULL;
+ if (fmInitialized && (err & COO_OVERRIDE_OK)
+ && accept_bad_cookie == PERHAPS)
+ {
+ /* XXX inputStr will crash if the line is close to COLS. Somehow
+ we need to make sure it doesn't crash. (impossible?) Really
+ need to fix it in linein.c. */
+ char *s = Sprintf("Accept bad cookie from %s for %s? (y or n) ",
+ pu->host, domain->ptr)->ptr;
+ int L = strlen(s);
+ if (L > COLS - 4)
+ s[COLS - 4] = '\0';
+ term_raw();
+ inputStr(s, FALSE);
+ }
+ if (ans == NULL || tolower(*ans) != 'y'
+ || (err = add_cookie(pu, name, value, expires, domain, path,
+ flag | COO_OVERRIDE, comment, version, port,
+ commentURL))) {
+ err = (err & ~COO_OVERRIDE_OK) - 1;
+ if (err >= 0 && err < COO_EMAX) {
+ disp_message_nsec(Sprintf("This cookie was rejected "
+ "to prevent security violation. [%s]",
+ violations[err])->ptr, FALSE, 10, TRUE, FALSE);
+ } else {
+ disp_message_nsec("This cookie was rejected "
+ "to prevent security violation.", FALSE, 10, TRUE, FALSE);
+ }
+ } else
+ disp_message_nsec(Sprintf("Accepting invalid cookie: %s=%s",
+ name->ptr,value->ptr)->ptr, FALSE, 1, TRUE, FALSE);
+ }
}
}
#endif /* USE_COOKIE */
diff -ur w3m-0.1.9.dist/fm.h w3m-0.1.9.jis/fm.h
--- w3m-0.1.9.dist/fm.h Thu Apr 20 23:45:53 2000
+++ w3m-0.1.9.jis/fm.h Thu Apr 27 20:53:01 2000
@@ -72,6 +72,10 @@
#define FALSE 0
#define TRUE 1
+#ifdef USE_COOKIE
+#define PERHAPS 2
+#endif
+
#define INTERNAL_CODE 'E' /* use EUC-JP internally; do not change */
#define SHELLBUFFERNAME "*Shellout*"
@@ -467,6 +471,21 @@
#define COO_DOMAIN 4
#define COO_PATH 8
#define COO_DISCARD 16
+#define COO_OVERRIDE 32 /* user chose to override security checks */
+
+#define COO_OVERRIDE_OK 32 /* flag to specify that an error is overridable */
+ /* version 0 refers to the original cookie_spec.html */
+ /* version 1 refers to RFC 2109 */
+ /* version 1' refers to the Internet draft to obsolete RFC 2109 */
+#define COO_ETAIL (1 | COO_OVERRIDE_OK) /* tail match failed (version 0) */
+#define COO_ESPECIAL (2) /* special domain check failed (version 0) */
+#define COO_EPATH (3) /* Path attribute mismatch (version 1 case 1) */
+#define COO_ENODOT (4 | COO_OVERRIDE_OK) /* no embedded dots in Domain (version 1 case 2.1) */
+#define COO_ENOTV1DOM (5 | COO_OVERRIDE_OK) /* Domain does not start with a dot (version 1 case 2.2) */
+#define COO_EDOM (6 | COO_OVERRIDE_OK) /* domain-match failed (version 1 case 3) */
+#define COO_EBADHOST (7 | COO_OVERRIDE_OK) /* dot in matched host name in FQDN (version 1 case 4) */
+#define COO_EPORT (8) /* Port match failed (version 1' case 5) */
+#define COO_EMAX COO_EPORT
#endif /* USE_COOKIE */
typedef struct _Hist {
@@ -639,6 +658,7 @@
global int default_use_cookie init(TRUE);
global int use_cookie init(TRUE);
global int accept_cookie init(FALSE);
+global int accept_bad_cookie init(FALSE);
#endif /* USE_COOKIE */
#ifdef VIEW_UNSEENOBJECTS
diff -ur w3m-0.1.9.dist/rc.c w3m-0.1.9.jis/rc.c
--- w3m-0.1.9.dist/rc.c Thu Apr 20 22:53:15 2000
+++ w3m-0.1.9.jis/rc.c Thu Apr 27 20:53:02 2000
@@ -70,6 +70,7 @@
#define CMT_WRAP "折り返し検索"
#define CMT_USECOOKIE "クッキーを使用する"
#define CMT_ACCEPTCOOKIE "クッキーを受け付ける"
+#define CMT_ACCEPTBADCOOKIE "無効のクッキー" /*XXX FIXME*/
#define CMT_VIEW_UNSEENOBJECTS "背景画像等へのリンクを作る"
#define CMT_BGEXTVIEW "外部ビューアをバックグラウンドで動かす"
#define CMT_EXT_DIRLIST "ディレクトリリストに外部コマンドを使う"
@@ -132,6 +133,7 @@
#define CMT_WRAP "Wrap search"
#define CMT_USECOOKIE "Use Cookie"
#define CMT_ACCEPTCOOKIE "Accept Cookie"
+#define CMT_ACCEPTBADCOOKIE "Invalid Cookie"
#define CMT_VIEW_UNSEENOBJECTS "Display unseenobjects (e.g. bgimage) tag"
#define CMT_BGEXTVIEW "Background an external viewer"
#define CMT_EXT_DIRLIST "Use external program for directory listing"
@@ -188,6 +190,17 @@
};
#endif /* INET6 */
+#ifdef USE_COOKIE
+static char *badcookiestr[] = {
+ "0:discard",
+#if 0
+ "1:accept",
+#endif
+ "2:ask",
+ NULL,
+};
+#endif
+
struct param_section {
char *name;
struct param_ptr *params;
@@ -256,6 +269,7 @@
#ifdef USE_COOKIE
{"use_cookie", P_INT, PI_ONOFF, (void *) &use_cookie, CMT_USECOOKIE, NULL},
{"accept_cookie", P_INT, PI_ONOFF, (void *) &accept_cookie, CMT_ACCEPTCOOKIE, NULL},
+ {"accept_bad_cookie", P_INT, PI_SEL_C, (void*)&accept_bad_cookie, CMT_ACCEPTBADCOOKIE, &badcookiestr},
#endif /* USE_COOKIE */
#ifdef MOUSE
{"use_mouse", P_INT, PI_ONOFF, (void *) &use_mouse, CMT_MOUSE, NULL},
@@ -480,12 +494,21 @@
static void
parse_proxy()
{
- if (HTTP_proxy)
+ if (HTTP_proxy) {
parseURL(HTTP_proxy, &HTTP_proxy_parsed, NULL);
- if (GOPHER_proxy)
+ if (HTTP_proxy_parsed.scheme != SCM_HTTP)
+ fprintf(stderr, "Warning: HTTP proxy scheme is not HTTP!\n");
+ }
+ if (GOPHER_proxy) {
parseURL(GOPHER_proxy, &GOPHER_proxy_parsed, NULL);
- if (FTP_proxy)
+ if (GOPHER_proxy_parsed.scheme != SCM_HTTP)
+ fprintf(stderr, "Warning: GOPHER proxy scheme is not HTTP!\n");
+ }
+ if (FTP_proxy) {
parseURL(FTP_proxy, &FTP_proxy_parsed, NULL);
+ if (FTP_proxy_parsed.scheme != SCM_HTTP)
+ fprintf(stderr, "Warning: FTP proxy scheme is not HTTP!\n");
+ }
if (NO_proxy)
set_no_proxy(NO_proxy);
}
diff -ur w3m-0.1.9.dist/rcparams.h w3m-0.1.9.jis/rcparams.h
--- w3m-0.1.9.dist/rcparams.h Thu Apr 20 20:51:59 2000
+++ w3m-0.1.9.jis/rcparams.h Thu Apr 27 20:53:02 2000
@@ -44,6 +44,7 @@
global int WrapDefault init(FALSE);
#ifdef USE_COOKIE
global int use_cookie init(TRUE);
+global int accept_bad_cookie init(FALSE);
#endif /* USE_COOKIE */
/* params4 */
diff -ur w3m-0.1.9.dist/url.c w3m-0.1.9.jis/url.c
--- w3m-0.1.9.dist/url.c Thu Apr 20 22:53:15 2000
+++ w3m-0.1.9.jis/url.c Thu Apr 27 20:53:03 2000
@@ -24,6 +24,10 @@
#define write(a,b,c) write_s(a,b,c)
#endif /* __WATT32__ */
+#ifdef USE_SSL
+#include <crypto.h> /* SSLEAY_VERSION_NUMBER */
+#endif
+
#define NOPROXY_NETADDR /* allow IP address for no_proxy */
#ifdef INET6
@@ -373,6 +377,8 @@
bcopy((void *) &adr, (void *) &hostaddr.sin_addr, sizeof(long));
hostaddr.sin_family = AF_INET;
hostaddr.sin_port = s_port;
+ message(Sprintf("Connecting to %s\n", hostname)->ptr, 0, 0);
+ refresh();
if (connect(sock, (struct sockaddr *) &hostaddr,
sizeof(struct sockaddr_in)) < 0) {
#ifdef SOCK_DEBUG
----------- 8< --------------- cut here -------------- 8< --------------
-- Ambrose Li <ai337@freenet.toronto.on.ca> http://trends.ca/~acli/"A good style should show no sign of effort; what is written should seem a happy accident." -- Somerset Maugham.
This archive was generated by hypermail 2b29 : Wed Jul 19 2000 - 10:30:43 CDT